From 2c83a4f375bd283e2a6041a6c12e94304c7f6c25 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk[kaf24]" Date: Fri, 16 Jan 2004 19:29:56 +0000 Subject: [PATCH] bitkeeper revision 1.680 (40083bb4Kb4q4cb9z5eySYtCRnVh3A) maw_vd.patch --- .rootkeys | 8 + docs/VBD-HOWTO.txt | 273 +++++++++++++ tools/examples/add_vbd_to_dom.py | 51 +++ tools/examples/createlinuxdom.py | 33 +- tools/examples/list_vbds.py | 8 + tools/examples/remove_vbd_from_dom.py | 29 ++ tools/examples/vd_create.py | 36 ++ tools/examples/vd_delete.py | 20 + tools/examples/vd_format.py | 34 ++ tools/examples/vd_refresh.py | 31 ++ tools/xc/py/XenoUtil.py | 535 +++++++++++++++++++++++--- 11 files changed, 988 insertions(+), 70 deletions(-) create mode 100644 docs/VBD-HOWTO.txt create mode 100644 tools/examples/add_vbd_to_dom.py create mode 100644 tools/examples/list_vbds.py create mode 100644 tools/examples/remove_vbd_from_dom.py create mode 100644 tools/examples/vd_create.py create mode 100644 tools/examples/vd_delete.py create mode 100644 tools/examples/vd_format.py create mode 100644 tools/examples/vd_refresh.py diff --git a/.rootkeys b/.rootkeys index bee4577bcb..e590bafb93 100644 --- a/.rootkeys +++ b/.rootkeys @@ -7,6 +7,7 @@ 3f5ef5a2l4kfBYSQTUaOyyD76WROZQ README.CD 3f69d8abYB1vMyD_QVDvzxy5Zscf1A TODO 3f9e7d53iC47UnlfORp9iC1vai6kWw docs/Makefile +40083bb4LVQzRqA3ABz0__pPhGNwtA docs/VBD-HOWTO.txt 3fafbf11blCNItRsHe0UHwyu5CCDkw docs/Xeno-HOWTO 3f9e7d60PWZJeVh5xdnk0nLUdxlqEA docs/eps/xenlogo.eps 3f9e7d63lTwQbp2fnx7yY93epWS-eQ docs/figs/dummy @@ -39,13 +40,20 @@ 3e6377b24eQqYMsDi9XrFkIgTzZ47A tools/balloon/Makefile 3e6377d6eiFjF1hHIS6JEIOFk62xSA tools/balloon/README 3e6377dbGcgnisKw16DPCaND7oGO3Q tools/balloon/balloon.c +40083bb4_j61quzxosgZ19LUgLlgYw tools/examples/add_vbd_to_dom.py 3fbe2f12OPAkzIUtumU3wRAihnhocQ tools/examples/createlinuxdom.py 3fbe2f12dZbmXLlgQdMgkmnSUj23AQ tools/examples/destroydom.py +40083bb4lxCIf5HRu6fwWUyHCYOHKA tools/examples/list_vbds.py 3fbe2f12ltvweb13kBSsxqzZDAq4sg tools/examples/listdoms.py 3fca7700PVj36cZObaFZlQicRiw1pQ tools/examples/pincpu.py 3fd8bc48ww3aOqPhYjCr8KGulG0NQQ tools/examples/readxenconsolering.py +40083bb4zWkCUTHJKd1ApEOoPAuihg tools/examples/remove_vbd_from_dom.py 3fccbe068ov0YCxnk-2m4law19QMmA tools/examples/startdom.py 3fbe2f12Bnt8mwmr1ZCP6HWGS6yvYw tools/examples/stopdom.py +40083bb4LeyQyL-0riaV3UYDfHkl5g tools/examples/vd_create.py +40083bb4TmKs8pcFkOcJj1bKn3zcmg tools/examples/vd_delete.py +40083bb4u9Od6ujgect6mrxWfkk1pQ tools/examples/vd_format.py +40083bb4NhDpKiYTrebI3ZjX__oI_w tools/examples/vd_refresh.py 3f776bd2Xd-dUcPKlPN2vG89VGtfvQ tools/misc/Makefile 3f6dc136ZKOjd8PIqLbFBl_v-rnkGg tools/misc/miniterm/Makefile 3f6dc140C8tAeBfroAF24VrmCS4v_w tools/misc/miniterm/README diff --git a/docs/VBD-HOWTO.txt b/docs/VBD-HOWTO.txt new file mode 100644 index 0000000000..6a276a83b5 --- /dev/null +++ b/docs/VBD-HOWTO.txt @@ -0,0 +1,273 @@ +Virtual Block Devices / Virtual Disks in Xen - HOWTO +==================================================== + +HOWTO for Xen 1.2 + +Mark A. Williamson (mark.a.williamson@intel.com) +(C) Intel Research Cambridge 2004 + +Introduction +------------ + +This document describes the new Virtual Block Device (VBD) and Virtual Disk +features available in Xen release 1.2. First, a brief introduction to some +basic disk concepts on a Xen system: + +Virtual Block Devices (VBDs): + VBDs are the disk abstraction provided by Xen. All XenoLinux disk accesses + go through the VBD driver. Using the VBD functionality, it is possible + to selectively grant domains access to portions of the physical disks + in the system. + + A virtual block device can also consist of multiple extents from the + physical disks in the system, allowing them to be accessed as a single + uniform device from the domain with access to that VBD. + + Everyone who uses Xen / XenoLinux uses VBDs but for less advanced uses + they can almost be ignored. + +Virtual Disks (VDs): + VDs are an abstraction built on top of the functionality provided by + VBDs. The VD management code maintains a "free pool" of disk space on + the system that has been reserved for use with VDs. The tools can + automatically allocate collections of extents from this free pool to + create "virtual disks" on demand. + + VDs can then be used just like normal disks by domains. VDs appear + just like any other disk to guest domains, since they use the same VBD + abstraction, as provided by Xen. + + Using VDs is optional, since it's always possible to dedicate + partitions, or entire disks to your virtual machines. VDs are handy + when you have a dynamically changing set of virtual machines and you + don't want to have to keep repartitioning in order to provide them with + disk space. + +If that didn't all make sense, it doesn't matter too much ;-) Using the +functionality is fairly straightforward and some examples will clarify things. +The text below expands a bit on the concepts involved, finishing up with a +walkthrough of some simple virtual disk management tasks. + + +Virtual Block Devices +--------------------- + +Before covering VD management, it's worth discussing some aspects of the VBD +functionality that will be useful to know. + +A VBD is made up of a number of extents from physical disk devices. The +extents for a VBD don't have to be contiguous, or even on the same device. Xen +performs address translation so that they appear as a single contiguous +device. + +When the VBD layer is used to give access to entire drives or entire +partitions, the VBDs simply consist of a single extent, corresponding to the +drive or partition used. When used with Virtual Disks, the extent list +functionality will be used (discussed later). + +Xen 1.2 and its associated XenoLinux release support automatic registration / +removal of VBDs. It has always been possible to add a VBD to a running +XenoLinux domain but it was then necessary to run "xen_vbd_refresh" in order +for the new device to be detected. Nowadays, when a VBD is added, the domain +it's added to automatically registers the disk. + +Note that it is possible to use the VBD functionality to allow multiple domains +write access to the same areas of disk. This is almost always a bad thing! + +The provided example script createlinuxdom.py does its best to check that disk +areas are not shared unsafely and will catch many cases of this. Setting the +vbd_expert variable in that script controls how unsafe it allows VBD mappings +to be - 0 should be right for most people ;-) + + +Virtual Disk Management +----------------------- + +The VD management code runs entirely in userspace. The code is written in +Python and can therefore be accessed from custom scripts, as well as from the +convenience scripts provided. The underlying VD database is a SQLite database +in /var/spool/xen_vdisks.sqlite. + +The scripts provided are as follows: + +vd_format.py - "Formats" a partition or disk device for use storing + virtual disks. This does not actually write data to the + specified device. Rather, it adds the device to the VD + free-space pool, for later allocation. + + You should only add devices that correspond directly to + physical disks / partitions - trying to use a VBD that you + have created yourself as part of the free space pool has + undefined (possibly nasty) results. + +vd_create.py - Creates a virtual disk of specified size by allocating space + from the free space pool. The virtual disk is identified + in future by the unique ID returned by this script. + + The disk can be given an expiry time, if desired. For + most users, the best idea is to specify a time of 0 (which + has the special meaning "never expire") and then + explicitly delete the VD when finished with it - + otherwise, VDs could disappear unexpectedly... + +vd_refresh.py - Allows the expiry time of a (not yet expired) virtual disk to + be modified. Be aware the VD will disappear when the time has + expired. + +vd_delete.py - Explicitly delete a VD. Makes it disappear immediately. + + +The functionality provided by these scripts is also available directly from +Python functions in the XenoUtil module - you can use this functionality in +your own scripts. + +Populating VDs: + +Once you've created a VD, you might want to populate it from DOM0 (for +instance, to put a root filesystem onto it for a guest domain). This can be +done by dynamically creating a VBD - this is discussed below. + +More detail: + +When you use vd_format.py to add a device to the free space pool, the device is +logically split up into extents. These extents are recorded in the Virtual +Disk Management database in /var/spool/xen_vdisks.sqlite. + +When you use vd_create.py to add create a virtual disk, some of the extents in +the free space pool are reallocated for that virtual disk and a record for that +VD is added to the database. When VDs are mapped into domains as VBDs, the +system looks up the allocated extents for the virtual disk in order to set up +the underlying VBD. + +Free space is identified by the fact that it belongs to an "expired" disk. +When vd_format.py adds a real device to the free pool, it actually divides it +into extents and adds them to an already-expired virtual disk. + +If you set an expiry time on a VD, its extents will be liable to be reallocated +to new VDs as soon as that expiry time runs out. Therefore, be careful when +setting expiry times. + +Finally, vd_delete.py can be used to delete virtual disks when they are no +longer needed. It works by causing them to expire immediately. + +Security note: + +The disk space for VDs is not zeroed when it is initially added to the free +space pool OR when a VD expires OR when a VD is created. Therefore, if this is +not done manually it is possible for a domain to read a VD to determine what +was written by previous owners of its constituent extents. If this is a +problem, users should manually clean the VD in some way before allocating + + +Dynamically Registering VBDs +---------------------------- + +Two scripts are included to make it easy to add VDs to domains. + +add_vbd_to_dom.py - Creates a VBD corresponding to either a physical + device or a virtual disk and adds it as a specified + device under the target domain, with either read or + write access. + +remove_vbd_from_dom.py - Removes the VBD associated with a specified device + node from the target domain. + +These scripts are most useful when populating VDs. VDs can't be populated +directly, since they don't correspond to real devices. Using: + + add_vbd_to_dom.py vd:your_vd_id /dev/wherever 0 rw + +You can make a virtual disk available to DOM0. Sensible devices to map VDs to +in DOM0 are the /dev/xvd* nodes, since that makes it obvious that they are Xen +virtual devices that don't correspond to real physical devices. + +You can then format, mount and populate the VD through the nominated device +node. When you've finished, use: + + remove_vbd_from_dom.py /dev/whatever 0 + +To revoke DOM0's access to it. It's then ready for use in a guest domain. + + + +You can also use add_vbd_to_dom.py to grant access to a physical device to a +guest - you might use this to temporarily share a partition, or to add access +to a partition that wasn't granted at boot time. Again, remove_vbd_from_dom.py +allows you to revoke access. + +When playing with VBDs, remember that in general, it is only safe for two +domains to have access to a filesystem if they both have read-only access. You +shouldn't be trying to share anything which is writeable, even if only by one +domain, unless you're really sure you know what you're doing! + + + +Walkthrough: Booting a domain from a VD +--------------------------------------- + +As an example, here is a sequence of commands you might use to create a virtual +disk, populate it with a root filesystem and boot a domain from it. These +steps assume that you've installed the example scripts somewhere on your PATH - +if you haven't done that, you'll need to specify a fully qualified pathname in +the examples below. The steps also assume that you know how to use the +createlinuxdom.py script provided and have already set it up for your local +configuration, apart from the virtual disks info. + +First, if you haven't done so already, you'll initialise the free space pool by +adding a real partition to it. The details are stored in the database, so +you'll only need to do it once. You can also use this command to add further +partitions to the existing free space pool. + +> vd_format.py /dev/ + +Now you'll want to allocate the space for your virtual disk. Do so using the +following, specifying the size in megabytes. + +> vd_create.py + +At this point, the vd_create.py program will tell you the virtual disk ID. +Note it down, as it is how you will identify the virtual device in future. + +If you don't want the VD to be bootable (i.e. you're booting a domain from some +other medium and just want it to be able to access this VD), you can simply add +it to the vbds list in your custom createlinuxdom.py (step 5) and then run that +script. Any formatting / populating of the VD can be done from that domain. + +If you want to boot off your new VD as well then you need to populate it with a +standard Linux root filesystem. You'll need to temporarily add the VD to DOM0 +in order to do this. To give DOM0 r/w access to the VD, use the following +command line, substituting the ID you got earlier. + +> add_vbd_to_dom.py vd: /dev/xvda 0 rw + +This attaches the VD to the device /dev/xvda - you can use other devices if you +choose too but with the xvd* devices it's obvious you're using a virtual device. + +Now make a filesystem on this device, mount it and populate it with a root +filesystem. These steps are exactly the same as under normal Linux. When +you've finished, unmount the filesystem again. + +You should now remove the VD from DOM0. This will prevent you accidentally +changing it in DOM0, whilst the guest domain is using it. + +> remove_vbd_from_dom.py /dev/xvda 0 + +It should now be possible to boot a guest domain from the VD. To do this, you +should add the VD's details to the vbds list in step 5 of createlinuxdom.py and +set the value of rootbit in step 6. For instance, you might add: + +('vd:', '/dev/xvda', 'w') + +To the vbds list in step 5 - this gives the domain writeable access to the VD +as if it were the domain's /dev/xvda. + +Then you would set: + +rootbit = "root=/dev/xvda ro" + +In step 6 to tell the kernel where the root filesystem is. + + + +Once these variables are set, you can run createlinuxdom.py to start your new +domain. diff --git a/tools/examples/add_vbd_to_dom.py b/tools/examples/add_vbd_to_dom.py new file mode 100644 index 0000000000..629ca40617 --- /dev/null +++ b/tools/examples/add_vbd_to_dom.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +# Used to map a VBD into a domain's device space. Useful for populating a new +# VBD with data from DOM0 before starting a new domain using it, for instance. + +import Xc, XenoUtil, sys + +XenoUtil.VBD_EXPERT_LEVEL = 0 # sets the allowed level of potentially unsafe mappings + +def usage(): + print >>sys.stderr,"""Usage: add_vdisk_to_dom.py uname target-dev target-dom [perms] + uname - the uname of the source device, e.g. vd:2341 or phy:hda3 + target-dev - the device node to map the VBD to + target-dom - domain to add the new VBD to + perms - optionally specify 'r', or 'rw' (default is 'r') + """ + sys.exit(1) + +xc = Xc.new() + +if not 4 <= len(sys.argv) <= 5: + print len(sys.argv) + usage() + +writeable = 0 + +if len(sys.argv) == 5: + if sys.argv[4] == 'rw': + writeable = 1; + else: + if sys.argv[4] != 'r': + usage() + +segments = XenoUtil.lookup_disk_uname(sys.argv[1]) + +if XenoUtil.vd_extents_validate(segments,writeable) < 0: + print "That mapping is too unsafe for the current VBD expertise level" + sys.exit(1) + +virt_dev = XenoUtil.blkdev_name_to_number(sys.argv[2]) + +target_dom = int(sys.argv[3]) + +xc.vbd_create(target_dom,virt_dev,writeable) + +if xc.vbd_setextents( target_dom, virt_dev, segments ): + print "Error populating VBD vbd=%d\n" % virt_dev + sys.exit(1) + + +print "Added " + sys.argv[1] + " to domain " + sys.argv[3] + " as device " + sys.argv[2] diff --git a/tools/examples/createlinuxdom.py b/tools/examples/createlinuxdom.py index 1f54841cd3..31b8aece81 100755 --- a/tools/examples/createlinuxdom.py +++ b/tools/examples/createlinuxdom.py @@ -51,6 +51,12 @@ vbds = [ ('phy:sda%d'%(7+guestid),'sda1','w' ), ('phy:sda6','sda6','r'), ('phy:cdrom','hdd','r') ] +# STEP 5b. Set the VBD expertise level. Most people should leave this +# on 0, at least to begin with - this script can detect most dangerous +# disk sharing between domains and with this set to zero it will only +# allow read only sharing. +vbd_expert = 0 + # STEP 6. Build the command line for the new domain. Edit as req'd. # You only need the ip= line if you're NFS booting or the root file system # doesn't set it later e.g. in ifcfg-eth0 or via DHCP @@ -96,7 +102,7 @@ def make_domain(): # set up access to the global variables declared above global image, memory_megabytes, domain_name, ipaddr, netmask - global vbds, cmdline, xc + global vbds, cmdline, xc, vbd_expert if not os.path.isfile( image ): print "Image file '" + image + "' does not exist" @@ -116,6 +122,10 @@ def make_domain(): sys.exit() # setup the virtual block devices + + # set the expertise level appropriately + XenoUtil.VBD_EXPERT_MODE = vbd_expert + for ( uname, virt_name, rw ) in vbds: virt_dev = XenoUtil.blkdev_name_to_number( virt_name ) @@ -125,20 +135,23 @@ def make_domain(): xc.domain_destroy ( dom=id ) sys.exit() + # check that setting up this VBD won't violate the sharing + # allowed by the current VBD expertise level + if XenoUtil.vd_extents_validate(segments, rw=='w') < 0: + xc.domain_destroy( dom = id ) + sys.exit() + if xc.vbd_create( dom=id, vbd=virt_dev, writeable= rw=='w' ): print "Error creating VBD vbd=%d writeable=%d\n" % (virt_dev,rw) xc.domain_destroy ( dom=id ) sys.exit() - for (s_dev,s_start,s_len,s_type) in segments: - if xc.vbd_grow( dom=id, - vbd=virt_dev, - device=s_dev, - start_sector=s_start, - nr_sectors=s_len ): - print "Error populating VBD vbd=%d\n" % virt_dev - xc.domain_destroy ( dom=id ) - sys.exit() + if xc.vbd_setextents( dom=id, + vbd=virt_dev, + extents=segments): + print "Error populating VBD vbd=%d\n" % virt_dev + xc.domain_destroy ( dom=id ) + sys.exit() # setup virtual firewall rules for all aliases for ip in ipaddr: diff --git a/tools/examples/list_vbds.py b/tools/examples/list_vbds.py new file mode 100644 index 0000000000..de13c5101c --- /dev/null +++ b/tools/examples/list_vbds.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python + + +import Xc, sys + +xc = Xc.new() + +print xc.vbd_probe() diff --git a/tools/examples/remove_vbd_from_dom.py b/tools/examples/remove_vbd_from_dom.py new file mode 100644 index 0000000000..0db60f2be8 --- /dev/null +++ b/tools/examples/remove_vbd_from_dom.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +# Used to map a VBD into a domain's device space. Useful for populating a new +# VBD with data from DOM0 before starting a new domain using it, for instance. + +# Usage: add_vdisk_to_dom.py uname target-dev-name target-dom-number +# uname - the uname of the device, e.g. vd:2341 or phy:hda3 +# target-dev-name - the device node to map the VBD to +# target-dom-number - domain to add the new VBD to + +import Xc, XenoUtil, sys + +xc = Xc.new() + +if len(sys.argv) != 3: + print >>sys.stderr,"""Usage: add_vdisk_to_dom.py target-dev target-dom + target-dev - the device node the VBD is mapped to + target-dom - domain to remove the VBD from""" + sys.exit(1) + +virt_dev = XenoUtil.blkdev_name_to_number(sys.argv[1]) + +target_dom = int(sys.argv[2]) + +if not xc.vbd_destroy(target_dom,virt_dev): + print "Removed " + sys.argv[1] + " from domain " + sys.argv[2] +else: + print "Failed" + sys.exit(1) diff --git a/tools/examples/vd_create.py b/tools/examples/vd_create.py new file mode 100644 index 0000000000..316e787ccd --- /dev/null +++ b/tools/examples/vd_create.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +# +# Wrapper script for creating a virtual disk. +# +# Usage: vd_create.py size [expiry] +# + +import XenoUtil, sys + +if len(sys.argv) > 1: + size = int(sys.argv[1]) +else: + print "Usage: " + sys.argv[0] + """ size [expiry] + Allocates a Virtual Disk out of the free space pool. An expiry time + can be specified in seconds from now (0 means never expire) - the default + is for disks to never expire.""" + sys.exit(1) + +if len(sys.argv) > 2: + expiry_time = int(sys.argv[2]) +else: + print "No expiry time specified - using default\n" + expiry_time = 0 + +print "Creating a virtual disk" +print "Size: %d" % size +print "Expiry time (seconds from now): %d" % expiry_time + +ret = XenoUtil.vd_create(size, expiry_time) + +if ret < 0: + print >> sys.stderr, "An error occurred creating the the disk" + sys.exit(ret) +else: + print "Virtual disk allocated, with ID: " + ret diff --git a/tools/examples/vd_delete.py b/tools/examples/vd_delete.py new file mode 100644 index 0000000000..ad7a210b14 --- /dev/null +++ b/tools/examples/vd_delete.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +# +# Wrapper script for deleting a virtual disk. +# +# Usage: vd_create.py id +# + +import sys, XenoUtil + +if len(sys.argv) > 1: + id = sys.argv[1] +else: + print "Usage: " + sys.argv[0] + """ id + Deletes a virtual disk.""" + sys.exit(1) + +print "Deleting a virtual disk with ID: " + id + +ret = XenoUtil.vd_delete(id) diff --git a/tools/examples/vd_format.py b/tools/examples/vd_format.py new file mode 100644 index 0000000000..d7402be996 --- /dev/null +++ b/tools/examples/vd_format.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python + +# +# Wrapper script for formatting a device to host Xen virtual disk extents +# +# Usage: vd_format.py device [extent_size] +# + +import sys, XenoUtil + +if len(sys.argv) > 1: + device = sys.argv[1] +else: + print "Usage: " + sys.argv[0] + """ device [extent_size] + Formats a device to host Xen virtual disk extents. The extent size can + optionally be specified in megabytes (default 64MB).""" + sys.exit(1) + +if len(sys.argv) > 2: + extent_size = int(sys.argv[2]) +else: + print """No extent size specified - using default size + (for really small devices, the default size of 64MB might not work)""" + extent_size = 64 + +print "Formatting for virtual disks" +print "Device: " + sys.argv[1] +print "Extent size: " + str(extent_size) + "MB" + +ret = XenoUtil.vd_format(device, extent_size) + +if ret: + print >> sys.stderr, "An error occurred formatting the device" + sys.exit(ret) diff --git a/tools/examples/vd_refresh.py b/tools/examples/vd_refresh.py new file mode 100644 index 0000000000..cfe7f4a2aa --- /dev/null +++ b/tools/examples/vd_refresh.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +# +# Wrapper script for creating a virtual disk. +# +# Usage: vd_refresh.py id [new-expiry] +# + +import sys, XenoUtil + +if len(sys.argv) > 1: + id = sys.argv[1] +else: + print "Usage: " + sys.argv[0] + """ ID [expiry-new] + Refreshes a Virtual Disk expiry time. An expiry time + can be specified in seconds from now (0 means never expire) - the default + is for disks to never expire.""" + sys.exit(1) + +if len(sys.argv) > 2: + expiry_time = int(sys.argv[2]) +else: + print "No expiry time specified - using default\n" + expiry_time = 0 + +print "Refreshing a virtual disk" +print "Id: " + sys.argv[1] +print "Expiry time (seconds from now): " + sys.argv[2] + +ret = XenoUtil.vd_refresh(id, expiry_time) + diff --git a/tools/xc/py/XenoUtil.py b/tools/xc/py/XenoUtil.py index 81fca35e47..c37aaaf8f2 100644 --- a/tools/xc/py/XenoUtil.py +++ b/tools/xc/py/XenoUtil.py @@ -1,67 +1,22 @@ +import string, re, os, sqlite, Xc, sys -import string, re, os +##### Module variables -def blkdev_name_to_number(name): - """Take the given textual block-device name (e.g., '/dev/sda1', - 'hda') and return the device number used by the OS. """ - - if not re.match( '/dev/', name ): - name = '/dev/' + name - - return os.stat(name).st_rdev - - -# lookup_blkdev_partn_info( '/dev/sda3' ) -def lookup_raw_partn(partition): - """Take the given block-device name (e.g., '/dev/sda1', 'hda') - and return a information tuple ( partn-dev, disc-dev, start-sect, - nr-sects, type ) - partn-dev: Device number of the given partition - disc-dev: Device number of the disc containing the partition - start-sect: Index of first sector of the partition - nr-sects: Number of sectors comprising this partition - type: 'Disk' or identifying name for partition type - """ +"""Location of the Virtual Disk management database. + defaults to /var/spool/xen_vdisks.sqlite +""" +VD_DB_FILE = "/var/spool/xen_vdisks.sqlite" - if not re.match( '/dev/', partition ): - partition = '/dev/' + partition +"""VBD expertise level - determines the strictness of the sanity checking. + This mode determines the level of complaints when disk sharing occurs + through the current VBD mappings. + 0 - only allow shared mappings if both domains have r/o access (always OK) + 1 - also allow sharing with one dom r/w and the other r/o + 2 - allow sharing with both doms r/w +""" +VBD_EXPERT_MODE = 0 - drive = re.split( '[0-9]', partition )[0] - - if drive == partition: - fd = os.popen( '/sbin/sfdisk -s ' + drive + ' 2>/dev/null' ) - line = fd.readline() - if line: - return [( blkdev_name_to_number(drive), - 0, - string.atol(line) * 2, - 'Disk' )] - return None - - # determine position on disk - fd = os.popen( '/sbin/sfdisk -d ' + drive + ' 2>/dev/null' ) - - #['/dev/sda3 : start= 16948575, size=16836120, Id=83, bootable\012'] - lines = fd.readlines() - for line in lines: - m = re.search( '^' + partition + '\s*: start=\s*([0-9]+), ' + - 'size=\s*([0-9]+), Id=\s*(\S+).*$', line) - if m: - return [( blkdev_name_to_number(drive), - string.atol(m.group(1)), - string.atol(m.group(2)), - m.group(3) )] - return None - -def lookup_disk_uname( uname ): - ( type, d_name ) = string.split( uname, ':' ) - - if type == "phy": - segments = lookup_raw_partn( d_name ) - elif type == "vd": - segments = lookup_vd( d_name ) - - return segments +##### Networking-related functions def get_current_ipaddr(dev='eth0'): """Return a string containing the primary IP address for the given @@ -134,3 +89,463 @@ def add_offset_to_ip( ip, off ): return '%d.%d.%d.%d' % ( ((a>>24)&0xff), ((a>>16)&0xff), ((a>>8)&0xff), (a&0xff) ) +##### VBD-related Functions + +def blkdev_name_to_number(name): + """Take the given textual block-device name (e.g., '/dev/sda1', + 'hda') and return the device number used by the OS. """ + + if not re.match( '/dev/', name ): + name = '/dev/' + name + + return os.stat(name).st_rdev + +# lookup_blkdev_partn_info( '/dev/sda3' ) +def lookup_raw_partn(partition): + """Take the given block-device name (e.g., '/dev/sda1', 'hda') + and return a dictionary { partn-dev, start-sect, + nr-sects, type } + device: Device number of the given partition + start_sector: Index of first sector of the partition + nr_sectsors: Number of sectors comprising this partition + type: 'Disk' or identifying name for partition type + """ + + if not re.match( '/dev/', partition ): + partition = '/dev/' + partition + + drive = re.split( '[0-9]', partition )[0] + + if drive == partition: + fd = os.popen( '/sbin/sfdisk -s ' + drive + ' 2>/dev/null' ) + line = fd.readline() + if line: + return [ { 'device' : blkdev_name_to_number(drive), + 'start_sector' : 0, + 'nr_sectors' : string.atol(line) * 2, + 'type' : 'Disk' } ] + return None + + # determine position on disk + fd = os.popen( '/sbin/sfdisk -d ' + drive + ' 2>/dev/null' ) + + #['/dev/sda3 : start= 16948575, size=16836120, Id=83, bootable\012'] + lines = fd.readlines() + for line in lines: + m = re.search( '^' + partition + '\s*: start=\s*([0-9]+), ' + + 'size=\s*([0-9]+), Id=\s*(\S+).*$', line) + if m: + return [ { 'device' : blkdev_name_to_number(drive), + 'start_sector' : string.atol(m.group(1)), + 'nr_sectors' : string.atol(m.group(2)), + 'type' : m.group(3) } ] + + return None + +def lookup_disk_uname( uname ): + """Lookup a list of segments for either a physical or a virtual device. + uname [string]: name of the device in the format \'vd:id\' for a virtual + disk, or \'phy:dev\' for a physical device + returns [list of dicts]: list of extents that make up the named device + """ + ( type, d_name ) = string.split( uname, ':' ) + + if type == "phy": + segments = lookup_raw_partn( d_name ) + elif type == "vd": + segments = vd_lookup( d_name ) + + return segments + + + +##### VD Management-related functions + + + +def __vd_no_database(): + """Called when no database found - exits with an error + """ + print >> sys.stderr, "ERROR: Could not locate the database file at " + VD_DB_FILE + sys.exit(1) + +def vd_format(partition, extent_size_mb): + """Format a partition or drive for use a virtual disk storage. + partition [string]: device file representing the partition + extent_size_mb [string]: extent size in megabytes to use on this disk + """ + + if not os.path.isfile(VD_DB_FILE): + vd_init_db(VD_DB_FILE) + + if not re.match( '/dev/', partition ): + partition = '/dev/' + partition + + cx = sqlite.connect(VD_DB_FILE) + cu = cx.cursor() + + cu.execute("select * from vdisk_part where partition = \'" + + partition + "\'") + row = cu.fetchone() + + extent_size = extent_size_mb * 2048 # convert megabytes to sectors + + if not row: + part_info = lookup_raw_partn(partition)[0] + + cu.execute("INSERT INTO vdisk_part(partition, part_id, extent_size) " + + "VALUES ( \'" + partition + "\', " + str(part_info['device']) + + ", " + str(extent_size) + ")") + + + cu.execute("SELECT max(vdisk_extent_no) FROM vdisk_extents " + + "WHERE vdisk_id = 0") + + max_id, = cu.fetchone() + + if max_id != None: + new_id = max_id + 1 + else: + new_id = 0 + + for i in range(part_info['nr_sectors'] / extent_size): + sql ="""INSERT INTO vdisk_extents(vdisk_extent_no, vdisk_id, + part_id, part_extent_no) + VALUES ("""+ str(new_id + i) + ", 0, "\ + + str(part_info['device']) + ", " + str(i) + ")" + cu.execute(sql) + + cx.commit() + cx.close() + return 0 + +def vd_create(size_mb, expiry): + """Create a new virtual disk. + size_mb [int]: size in megabytes for the new virtual disk + expiry [int]: expiry time in seconds from now + """ + + if not os.path.isfile(VD_DB_FILE): + __vd_no_database() + + cx = sqlite.connect(VD_DB_FILE) + cu = cx.cursor() + + size = size_mb * 2048 + + cu.execute("SELECT max(vdisk_id) FROM vdisks") + max_id, = cu.fetchone() + new_id = int(max_id) + 1 + + # fetch a list of extents from the expired disks, along with information + # about their size + cu.execute("""SELECT vdisks.vdisk_id, vdisk_extent_no, part_extent_no, + vdisk_extents.part_id, extent_size + FROM vdisk_extents NATURAL JOIN vdisks + NATURAL JOIN vdisk_part + WHERE expires AND expiry_time < datetime('now') + ORDER BY expiry_time asc, vdisk_extent_no desc + """) # aims to reuse the last extents + # from the longest-expired disks first + + allocated = 0 + + if expiry: + expiry_ts = "datetime('now', '" + str(expiry) + " seconds')" + expires = 1; + else: + expiry_ts = "NULL" + expires = 0; + + # we'll use this to build the SQL statement we want + building_sql = "INSERT INTO vdisks(vdisk_id, size, expires, expiry_time)" \ + +" VALUES ("+str(new_id)+", "+str(size)+ ", " \ + + str(expires) + ", " + expiry_ts + "); " + + counter = 0 + + while allocated < size: + row = cu.fetchone() + if not row: + cx.close() + return -1 + + (vdisk_id, vdisk_extent_no, part_extent_no, part_id, extent_size) = row + allocated += extent_size + building_sql += "UPDATE vdisk_extents SET vdisk_id = " + str(new_id) \ + + ", " + "vdisk_extent_no = " + str(counter) \ + + " WHERE vdisk_extent_no = " + str(vdisk_extent_no) \ + + " AND vdisk_id = " + str(vdisk_id) + "; " + + counter += 1 + + + # this will execute the SQL query we build to store details of the new + # virtual disk and allocate space to it print building_sql + cu.execute(building_sql) + + cx.commit() + cx.close() + return str(new_id) + + +# Future work: Disk sizes aren't modified when vd_create scavenges extents from +# expired disks. As a result it is possible to check if a disk is expired but +# intact (assuming VD IDs are not reused) - could allow recovery when people +# mess up. + +def vd_lookup(id): + """Lookup a Virtual Disk by ID. + id [string]: a virtual disk identifier + Returns [list of dicts]: a list of extents as dicts, contain fields: + device : Linux device number + start_sector : within the device + nr_sectors : size of this extent + type : set to \'VD Extent\' + """ + + if not os.path.isfile(VD_DB_FILE): + __vd_no_database() + + cx = sqlite.connect(VD_DB_FILE) + cu = cx.cursor() + + + # This query tells PySQLite how to convert the data returned from the + # following query - the use of the multiplication confuses it otherwise ;-) + # This row is significant to PySQLite but is syntactically an SQL comment. + + cu.execute("-- types int, int, int") + + # This SQL statement is designed so that when the results are fetched they + # will be in the right format to return immediately. + cu.execute("""SELECT vdisk_extents.part_id, + round(part_extent_no * extent_size) as start, + extent_size + + FROM vdisk_extents NATURAL JOIN vdisks + NATURAL JOIN vdisk_part + + WHERE (expiry_time > datetime('now') OR not expires) + AND vdisk_extents.vdisk_id = """ + id + ) + + ret = cu.fetchall() + + # use this function to map the results from the database into a dict + # list of extents, for consistency with the rest of the code + def transform ((device, start_sector, nr_sectors)): + return {'device' : device, 'start_sector' : int(start_sector), + 'nr_sectors' : nr_sectors, 'type' : 'VD Extent' } + + cx.commit() + cx.close() + + return map(transform, ret) # transforms the tuples into dicts to return + + +def vd_refresh(id, expiry): + """Change the expiry time of a virtual disk. + id [string]: a virtual disk identifier + expiry [int]: expiry time in seconds from now (0 = never expire) + """ + + if not os.path.isfile(VD_DB_FILE): + __vd_no_database() + + cx = sqlite.connect(VD_DB_FILE) + cu = cx.cursor() + + if expiry: + expires = 1 + expiry_ts = "datetime('now', '" + str(expiry) + " seconds')" + else: + expires = 0 + expiry_ts = "NULL" + + cu.execute("UPDATE vdisks SET expires = " + str(expires) + + ", expiry_time = " + expiry_ts + + " WHERE vdisk_id = " + id) + + cx.commit() + cx.close() + + return + +def vd_delete(id): + """Deletes a Virtual Disk, making its extents available for future + virtual disks. + [id] identifier for the virtual disk to delete + """ + + if not os.path.isfile(VD_DB_FILE): + __vd_no_database() + + cx = sqlite.connect(VD_DB_FILE) + cu = cx.cursor() + + cu.execute("UPDATE vdisks SET expires = 1, expiry_time = datetime('now')" + + " WHERE vdisk_id = " + id) + + cx.commit() + cx.close() + + return + +def vd_init_db(path): + """Initialise the VD SQLite database + path [string]: path to the SQLite database file + """ + + cx = sqlite.connect(path) + cu = cx.cursor() + + cu.execute( + """CREATE TABLE vdisk_extents + ( vdisk_extent_no INT, + vdisk_id INT, + part_id INT, + part_extent_no INT ) + """) + + cu.execute( + """CREATE TABLE vdisk_part + ( part_id INT, + partition VARCHAR, + extent_size INT ) + """) + + cu.execute( + """CREATE TABLE vdisks + ( vdisk_id INT, + size INT, + expires BOOLEAN, + expiry_time TIMESTAMP ) + """) + + + cu.execute( + """INSERT INTO vdisks ( vdisk_id, size, expires, expiry_time ) + VALUES ( 0, 0, 1, datetime('now') ) + """) + + cx.commit() + cx.close() + + VD_DB_FILE = path + + + +def vd_extents_validate(new_extents,new_writeable): + """Validate the extents against the existing extents. + Complains if the list supplied clashes against the extents that + are already in use in the system. + new_extents [list of dicts]: list of new extents, as dicts + new_writeable [int]: 1 if they are to be writeable, 0 otherwise + returns [int]: either the expertise level of the mapping if it doesn't + exceed VBD_EXPERT_MODE or -1 if it does (error) + """ + + xc = Xc.new() + + ##### Probe for explicitly created virtual disks and build a list + ##### of extents for comparison with the ones that are being added + + probe = xc.vbd_probe() + + old_extents = [] # this will hold a list of all existing extents and + # their writeable status, as a list of (device, + # start, size, writeable?) tuples + + for vbd in probe: + this_vbd_extents = xc.vbd_getextents(vbd['dom'],vbd['vbd']) + for vbd_ext in this_vbd_extents: + vbd_ext['writeable'] = vbd['writeable'] + old_extents.append(vbd_ext); + + ##### Now scan /proc/mounts for compile a list of extents corresponding to + ##### any devices mounted in DOM0. This list is added on to old_extents + + regexp = re.compile("/dev/(\S*) \S* \S* (..).*"); + fd = open('/proc/mounts', "r") + + while True: + line = fd.readline() + if not line: # if we've run out of lines then stop reading + break + + m = regexp.match(line) + + # if the regexp didn't match then it's probably a line we don't + # care about - skip to next line + if not m: + continue + + # lookup the device + ext_list = lookup_raw_partn(m.group(1)) + + # if lookup failed, skip to next mounted device + if not ext_list: + continue + + # set a writeable flag as appropriate + for ext in ext_list: + ext['writeable'] = m.group(2) == 'rw' + + # now we've got here, the contents of ext_list are in a + # suitable format to be added onto the old_extents list, ready + # for checking against the new extents + + old_extents.extend(ext_list) + + fd.close() # close /proc/mounts + + ##### By this point, old_extents contains a list of extents, in + ##### dictionary format corresponding to every extent of physical + ##### disk that's either part of an explicitly created VBD, or is + ##### mounted under DOM0. We now check these extents against the + ##### proposed additions in new_extents, to see if a conflict will + ##### happen if they are added with write status new_writeable + + level = 0 # this'll accumulate the max warning level + + # Search for clashes between the new extents and the old ones + # Takes time O(len(new_extents) * len(old_extents)) + for new_ext in new_extents: + for old_ext in old_extents: + if(new_ext['device'] == old_ext['device']): + + new_ext_start = new_ext['start_sector'] + new_ext_end = new_ext_start + new_ext['nr_sectors'] - 1 + + old_ext_start = old_ext['start_sector'] + old_ext_end = old_ext_start + old_ext['nr_sectors'] - 1 + + if((old_ext_start <= new_ext_start <= old_ext_end) or + (old_ext_start <= new_ext_end <= old_ext_end)): + if (not old_ext['writeable']) and new_writeable: + level = max(1,level) + elif old_ext['writeable'] and (not new_writeable): + level = max(1,level) + elif old_ext['writeable'] and new_writeable: + level = max(2,level) + + + ##### level now holds the warning level incurred by the current + ##### VBD setup and we complain appropriately to the user + + + if level == 1: + print >> sys.stderr, """Warning: one or more hard disk extents + writeable by one domain are also readable by another.""" + elif level == 2: + print >> sys.stderr, """Warning: one or more hard disk extents are + writeable by two or more domains simultaneously.""" + + if level > VBD_EXPERT_MODE: + print >> sys.stderr, """ERROR: This kind of disk sharing is not allowed + at the current safety level (%d).""" % VBD_EXPERT_MODE + level = -1 + + return level + -- 2.30.2